
//Inputs:

//"$output/imports2007_lfttd.dta" //created by SAS_03_pull_trade_data.sas
//"$input/country_data_unique.dta"
//"$output/firm_2007.dta" // Stata_01_Make_census_firm_dataset_v04.do
//Outputs

//"$df_output/missing_countries.dta"
//"$output/firm_country_import.dta"

//Notes: 
/************************************
****This program pulls in the import data and the firm input data to 
****construct a dataset of with firm sourcing by country

*this version is going to flag import firmids that don't match to the lbd data. Then it will 
*use the import micro data to pull any alternative firmids for those importers and try to match them to the 
*unmatched lbd firm data

*v03: changed to use the lfttd data
************************************
**************************************************/


**Set directories
cd ""  /* PROJECT ROOT FOLDER */
global data "data"

set more off

cap program drop clean_imports
program define clean_imports
  syntax , year(integer)

  use "$data/imports`year'_lfttd.dta", clear

	if `year' == 1997 | `year' == 2002 {
	ren COUNTRY country
	ren HS hs
}

  *Drop mineral imports/exports
  drop if substr(hs,1,2)=="27"  // 27 is the HS2 code for minerals -- this is public data from the FTD website.
  
**# Bookmark #1
  *Count number of products
  gen hs10_count=1

  *Collapse to firm-country level
  collapse (sum) imp_value rp_imp_value hs10_count, by(firmid country) fast


  *Merge to bea country codes to collapse some of the super detailed countries
  rename country code_TD
    merge m:1 code_TD using "$data/country_data_unique.dta", keepusing(code_TD countrycode country_name iso3)
    drop _merge
    rename countrycode ctry
    drop if firmid==""
    
  *ouptut dataset of imports for which we don't have bea data
    preserve
      gen ctry_in_bea="yes" if ctry~=.
      replace ctry_in_bea="no" if ctry==.
      collapse (sum) imp_val, by(code_TD iso ctry_in_bea)
      egen tot=sum(imp_val)
      gen share=imp/tot
      save "$data/missing_countries.dta", replace   
    restore 
      
  *Collapse to BEA aggregated country names
    collapse (sum) imp_value rp_imp_value, by(firmid ctry country_name)

  *Merge to the firm data 
  merge m:1 firmid using "$data/firm_`year'.dta", keepusing(firmid emp_man)
  gen man=1 if emp_man>0 & emp_man~=.
  drop if _merge==2
    *Assess import match rates
    ******************************************
    preserve
      gen firm_match="yes" if _merge==3
	replace firm_match="no" if _merge==1 & ~missing(firmid)
	replace firm_match="no firmid" if firmid=="unmatched"
      gen firms=1
    replace imp_value=0 if imp_value==.
      gen importers=1 if imp_value>0
      collapse (sum) imp_value  firms importers , by(firm_match)
      foreach vv in imp_value  firms  importers {
	egen tot_`vv'=sum(`vv')
	gen share_`vv'=`vv'/tot_`vv'
	}
    export excel firm_match share* firms  using $data/import_lbd_mergestats_v02.xls, sheetreplace sheet(match_agg_imptf1_HR_`year') ///
      firstrow(varlabels)
    restore  
    
    preserve
      gen firm_match="yes" if _merge==3
	replace firm_match="no" if _merge==1 & ~missing(firmid)
	replace firm_match="no firmid" if firmid=="unmatched"
      gen firms=1
    replace imp_value=0 if imp_value==.
      gen importers=1 if imp_value>0
      gen imp_value_man = imp_value if man
      replace imp_value_man=0 if imp_value==. & man
      gen importers_man=1 if imp_value>0 & man
      collapse (sum) imp_value  firms importers imp_value_man importers_man man , by( ctry country_name firm_match)
      foreach vv in imp_value  firms  importers {
	egen tot_`vv'=sum(`vv')
	gen share_`vv'=`vv'/tot_`vv'
	}
      foreach vv in  imp_value_man importers_man man  {
	egen tot_`vv'=sum(`vv')
	gen share_`vv'=`vv'/tot_`vv'
	}
    export excel country_name ctry firm_match share* firms man using $data/import_lbd_mergestats_v02.xls, sheetreplace sheet(match_ctry_imptf1_HR`year') ///
      firstrow(varlabels)
    restore  
    ************************************************

  *drop imports that didn't merge
  drop if _merge==1
  drop _merge

  drop emp_man man
  save "$data/firm_country_import`year'.dta", replace 
  

end //end clean_census program

*clean_imports, year(1997)
*clean_imports, year(2002)
clean_imports, year(2007)
*clean_imports, year(2012)



